require(tidyverse)
# load in the data
copepod = read.csv('GulfOilCopepod.csv')
# change some NAs to 0 as they ought to be
copepod$Metamorphosis[is.na(copepod$Metamorphosis)] = 0
copepod$Adult[is.na(copepod$Adult)] = 0
# eliminate the sex variable
# Make Clutch go from 1 to 16
### 1 = Before 1
### 16 = After 8
# Make Population go from 1 to 2
### 1 = Before
### 2 = After
# Make Oil go from 1 to 3
### 1 = 0%
### 2 = 75%
### 3 = 100%
# Make Vial go from 1 to 96
### Order is Clutch, Oil, Replicate
### 1 = Before | 1 | 0% | A
### 96 = After | 8 | 100% | B
### Fix when eliminate non-hatched eggs
### to account for empty vials
copepod = copepod %>%
select(-Sex) %>%
mutate(Clutch = Clutch + 8*(Population=="After")) %>%
mutate(Pop = recode(Population,
Before = 1,
After = 2)) %>%
mutate(Oil = recode(OilConc,
`0` = 1,
`75` = 2,
`100` = 3)) %>%
mutate(Group = 3*(Pop-1) + Oil) %>%
mutate(Vial = 6*(Clutch-1) + 2*(Oil-1) + (Replicate=="B") + 1) %>%
select(-Replicate)
copepod_hatched = copepod %>%
filter(Hatched==1) %>%
mutate(Vial = as.integer(as.factor(Vial)))
# create list of data for stan()
copepod_data = with(copepod_hatched,
list(N = nrow(copepod_hatched),
## P = length(unique(Pop)),
## T = length(unique(Oil)),
G = length(unique(Group)),
C = length(unique(Clutch)),
V = length(unique(Vial)),
## Pop = as.integer(Pop),
## Oil = as.integer(Oil),
Group = as.integer(Group),
Clutch = as.integer(Clutch),
Vial = as.integer(Vial),
Adult = as.integer(Adult)))
\[ \mathsf{P}(Y_i = 1) = \text{invlogit}(\eta_i) \]
\[ \text{invlogit}(x) = \frac{1}{1 + \mathrm{e}^{-x}} \]
\[ \eta_i = \alpha_{\text{Group}[i]} + \beta_{\text{Clutch}[i]} \]
\[ \alpha_j \sim \text{N}(0,\sigma_g) \]
\[ \beta_k \sim \text{N}(0,\sigma_c) \]
\[ \sigma_g, \sigma_c \sim \text{Uniform}(0,10) \]
functions
{
real invlogit(real x)
{
return ( 1 / ( 1 + exp(-x) ) );
}
}
data
{
int<lower=0> N; // number of eggs
int<lower=0> G; // number of groups (population*oil treatment)
int<lower=0> C; // number of clutches
// data
int<lower=1> Group[N];
int<lower=1> Clutch[N];
int<lower=0,upper=1> Adult[N];
}
parameters
{
real alpha[G]; // group effect
real beta[C]; // clutch effect
real<lower=0,upper=10> sigmaGroup;
real<lower=0,upper=10> sigmaClutch;
}
transformed parameters
{
// declarations
real eta[N];
// definitions
for ( i in 1:N )
eta[i] = alpha[Group[i]] + beta[Clutch[i]];
}
model
{
alpha ~ normal(0,sigmaGroup);
beta ~ normal(0,sigmaClutch);
Adult ~ bernoulli_logit(eta);
}
generated quantities {
// declarations
real diff0;
real diff75;
real diff100;
real p_before_0;
real p_before_75;
real p_before_100;
real p_after_0;
real p_after_75;
real p_after_100;
// definitions
p_before_0 = invlogit(alpha[1]);
p_before_75 = invlogit(alpha[2]);
p_before_100 = invlogit(alpha[3]);
p_after_0 = invlogit(alpha[4]);
p_after_75 = invlogit(alpha[5]);
p_after_100 = invlogit(alpha[6]);
diff0 = p_after_0 - p_before_0;
diff75 = p_after_75 - p_before_75;
diff100 = p_after_100 - p_before_100;
}
require(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
source("copepods-stan.R")
fit.stan = stan(file="copepods-no-vial.stan",
data=copepod_data,
pars=list("eta"),
include=FALSE)traceplot(fit.stan,"lp__")traceplot(fit.stan,"alpha")traceplot(fit.stan,"beta")traceplot(fit.stan,"diff0")traceplot(fit.stan,"diff75")traceplot(fit.stan,"diff100")options(width=120)
monitor(fit.stan, digits_summary=3)## Inference for the input samples (4 chains: each with iter=1000; warmup=0):
##
## mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
## alpha[1] 1.000 0.012 0.463 0.122 0.699 0.985 1.291 1.945 1381 1.002
## alpha[2] -0.409 0.012 0.468 -1.351 -0.713 -0.404 -0.100 0.503 1512 1.003
## alpha[3] -2.220 0.014 0.618 -3.499 -2.602 -2.202 -1.788 -1.116 1889 1.001
## alpha[4] 1.508 0.013 0.507 0.579 1.158 1.486 1.821 2.575 1530 1.001
## alpha[5] 1.651 0.013 0.504 0.732 1.311 1.621 1.973 2.709 1482 1.000
## alpha[6] -1.756 0.012 0.550 -2.886 -2.111 -1.725 -1.388 -0.724 1982 1.000
## beta[1] 0.571 0.013 0.607 -0.555 0.173 0.544 0.962 1.835 2161 1.001
## beta[2] -0.997 0.016 0.678 -2.464 -1.413 -0.956 -0.534 0.211 1706 1.001
## beta[3] -0.337 0.013 0.559 -1.508 -0.697 -0.318 0.048 0.723 1868 1.001
## beta[4] -0.355 0.013 0.574 -1.547 -0.708 -0.336 0.017 0.730 1904 1.000
## beta[5] -0.162 0.013 0.577 -1.317 -0.555 -0.156 0.230 0.941 1853 1.002
## beta[6] 0.356 0.013 0.602 -0.809 -0.038 0.347 0.743 1.592 2316 1.002
## beta[7] 0.496 0.013 0.587 -0.632 0.105 0.473 0.873 1.693 2155 1.000
## beta[8] -0.154 0.013 0.569 -1.277 -0.515 -0.145 0.212 0.974 2066 1.002
## beta[9] -0.987 0.015 0.604 -2.253 -1.367 -0.965 -0.575 0.119 1713 1.000
## beta[10] -0.598 0.014 0.611 -1.870 -0.975 -0.572 -0.182 0.530 1787 1.001
## beta[11] 0.567 0.012 0.622 -0.609 0.141 0.560 0.980 1.825 2566 1.000
## beta[12] -0.648 0.014 0.580 -1.872 -1.028 -0.627 -0.252 0.444 1797 1.000
## beta[13] -0.739 0.014 0.571 -1.937 -1.094 -0.712 -0.348 0.285 1720 1.001
## beta[14] -0.067 0.013 0.607 -1.283 -0.460 -0.058 0.319 1.126 2193 1.001
## beta[15] 1.527 0.020 0.831 0.130 0.912 1.457 2.077 3.310 1691 0.999
## beta[16] 1.463 0.020 0.844 0.051 0.872 1.375 1.950 3.430 1867 1.000
## sigmaGroup 2.081 0.025 0.909 1.006 1.483 1.875 2.429 4.387 1318 1.002
## sigmaClutch 1.030 0.012 0.356 0.456 0.787 0.989 1.227 1.795 816 1.000
## diff0 0.086 0.003 0.119 -0.142 0.008 0.082 0.161 0.327 1619 1.000
## diff75 0.424 0.003 0.128 0.165 0.337 0.426 0.513 0.666 1610 1.001
## diff100 0.049 0.002 0.092 -0.129 -0.011 0.048 0.106 0.242 1943 1.000
## p_before_0 0.722 0.002 0.089 0.530 0.668 0.728 0.784 0.875 1459 1.001
## p_before_75 0.404 0.003 0.107 0.206 0.329 0.400 0.475 0.623 1505 1.003
## p_before_100 0.111 0.001 0.058 0.029 0.069 0.100 0.143 0.247 1984 1.000
## p_after_0 0.807 0.002 0.074 0.641 0.761 0.815 0.861 0.929 1715 1.000
## p_after_75 0.828 0.002 0.068 0.675 0.788 0.835 0.878 0.938 1762 1.000
## p_after_100 0.160 0.002 0.071 0.053 0.108 0.151 0.200 0.326 2129 1.000
## lp__ -162.597 0.145 4.361 -172.009 -165.344 -162.301 -159.523 -155.008 911 1.001
##
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
plot(fit.stan,pars="alpha",ci_level=0.5)plot(fit.stan,pars="beta",ci_level=0.5)invlogit = function(x) return(1 / (1+exp(-x)))
alpha = rstan::extract(fit.stan,"alpha")
p.stan = apply(invlogit(alpha$alpha),2,mean)
# Compare to observed frequencies
p.observed = with(copepod_hatched, sapply(split(Adult,Group),mean) )
m = rbind(p.stan,p.observed)
colnames(m) = paste(rep(c("Before","After"),each=3),rep(c(0,75,100),2))
rownames(m) = c("Stan","Observed")
round(m,3)## Before 0 Before 75 Before 100 After 0 After 75 After 100
## Stan 0.722 0.404 0.111 0.807 0.828 0.16
## Observed 0.707 0.391 0.095 0.768 0.793 0.17
require(lme4)
fit.lmer = glmer( Adult ~ factor(OilConc)*Population + (1 | Clutch),
data=copepod_hatched,
family=binomial)
predictMatrix = rbind(c(1,0,0,0,0,0),
c(1,1,0,0,0,0),
c(1,0,1,0,0,0),
c(1,0,0,1,0,0),
c(1,1,0,1,1,0),
c(1,0,1,1,0,1))
p.lmer = as.vector( invlogit(predictMatrix %*% fixef(fit.lmer)) )[c(4:6,1:3)]m = rbind(p.observed,p.stan,p.lmer)
rownames(m) = c("Observed","Stan","Lmer")
colnames(m) = paste(rep(c("Before","After"),each=3),rep(c(0,75,100),2))
round(m, digits=3)## Before 0 Before 75 Before 100 After 0 After 75 After 100
## Observed 0.707 0.391 0.095 0.768 0.793 0.170
## Stan 0.722 0.404 0.111 0.807 0.828 0.160
## Lmer 0.719 0.383 0.084 0.824 0.843 0.154
d = extract(fit.stan,c("diff0","diff75","diff100"))
with(d, round(quantile(diff0,c(0.025,0.975)),3))## 2.5% 97.5%
## -0.142 0.327
with(d, round(quantile(diff75,c(0.025,0.975)),3))## 2.5% 97.5%
## 0.165 0.666
with(d, round(quantile(diff100,c(0.025,0.975)),3))## 2.5% 97.5%
## -0.129 0.242
Oil Concentration Before After Difference SE
0% 0.718 0.819 0.101 0.080
75% 0.379 0.843 0.465 0.095
100% 0.076 0.150 0.074 0.068
## 0.025 0.975
## 0% -0.05579712 0.2577971
## 75% 0.27780342 0.6501966
## 100% -0.05927755 0.2072776